package bhwz.seac3.bl.import_.crawler;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.Predicate;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;

import bhwz.seac3.vo.MatchVo;
import bhwz.seac3.vo.PlayerScoreTableVo;

public class HtmlParser {
	public Set<String> getLinks(String html, String baseUrl) {
		return filterLinks(html, baseUrl, "", s -> {
			return true;
		});
	}

	/**
	 * 根据条件过滤当前页面的链接;
	 * 
	 * @param html
	 * @param baseUrl
	 * @param prefix
	 * @param p
	 *            过滤条件,链接内容
	 * @return
	 */
	public Set<String> filterLinks(String html, String baseUrl, String prefix,
			Predicate<String> p) {
		Set<String> result = new HashSet<>();
		Document doc = Jsoup.parse(html, baseUrl);
		Elements links = doc.select(prefix + "a[href]");
		for (Element link : links) {
			String link_str = link.attr("abs:href");
			String link_txt = link.text();
			if (p.test(link_txt)) {
				result.add(link_str);
			}
		}
		return result;
	}

	public MatchVo encapsulateMatchVo(String html, String baseUrl) {
		MatchVo vo = new MatchVo();
		Document doc = Jsoup.parse(html, baseUrl);
		String h1 = doc.getElementsByTag("h1").text();
		String date = h1.substring(h1.indexOf(',') + 1).trim();
		// set date
		try {
			vo.setDate(new SimpleDateFormat("MMM dd,yyyy",Locale.US).parse(date)
					.getTime());
		} catch (ParseException e) {
			e.printStackTrace();
		}
		String year = new SimpleDateFormat("yyyy")
				.format(new Date(vo.getDate()));
		String month = new SimpleDateFormat("MM")
				.format(new Date(vo.getDate()));
		String yearMinus = Integer.parseInt(year) - 1 + "";
		String yearAdd = Integer.parseInt(year) + 1 + "";
		// set season
		String season = Integer.parseInt(month) > 8 ? year.substring(2, 4)
				+ "-" + yearAdd.substring(2, 4) : yearMinus.substring(2, 4)
				+ "-" + year.substring(2, 4);
		vo.setSeason(season);

		Element teamsLine = doc.select("td.align_center.background_yellow")
				.first(); // teams
		Element td = teamsLine.getElementsByTag("td").first();
		String[] teams = td.text().split(" ");
		vo.setTeam1(teams[0]);
		vo.setTeam2(teams[1]); // set teams
		Elements SectionScore = doc.select("td.align_right"); // sectionScore
		List<Integer> sectionScores = new LinkedList<>();
		for (Element t : SectionScore) {
			if (!t.html().contains(">")) {
				sectionScores.add(Integer.parseInt(t.text()));
			}
		}
		int midle = sectionScores.size() / 2 - 1;
		List<String> sectionScoresVo = new LinkedList<>();
		for (int j = 0; j < midle; j++) {
			sectionScoresVo.add(sectionScores.get(j) + "-"
					+ sectionScores.get(j + midle + 1));
		}
		// set sectionScore
		vo.setSectionScores(sectionScoresVo);

		int[] teamScore = { 0, 0 };
		teamScore[0] = sectionScores.get(midle);
		teamScore[1] = sectionScores.get(sectionScores.size() - 1);
		vo.setTeam1Score(teamScore[0]);
		vo.setTeam2Score(teamScore[1]);

		// set TeamPlayerScore
		vo.setTeam1PlayersScore(getPlayerPerformance(doc, vo.getTeam1()));
		vo.setTeam2PlayersScore(getPlayerPerformance(doc, vo.getTeam2()));

		/**
		 * test
		 */
		/*
		 * System.out.println(new SimpleDateFormat("yyyy-MM-dd").format(new
		 * Date(
		 * vo.getDate()))+";"+vo.getSeason()+";"+vo.getTeam1()+"-"+vo.getTeam2
		 * ()+";"+vo.getTeam1Score()+"-"+vo.getTeam2Score()); for(int
		 * k=0;k<vo.getSectionScores().size();k++){
		 * System.out.print(vo.getSectionScores().get(k)); }
		 * System.out.println(); System.out.println(vo.getTeam1());
		 */
		return vo;
	}

	private Set<PlayerScoreTableVo> getPlayerPerformance(Document doc,
			String team) {
		Set<PlayerScoreTableVo> set = new HashSet<>();
		// 解析球员表现的table
		Elements trs = doc.select("table#" + team + "_basic").select("tr");
		for (Element tr : trs) {
			// System.out.println(tr);
			PlayerScoreTableVo onePlayer = new PlayerScoreTableVo();
			Elements tds = tr.getElementsByTag("td");
			List<String> playerContents = new LinkedList<>();
			for (Element td : tds) {
				playerContents.add(td.text());
				Element playerLink = td.select("a[href]").first();
				if (playerLink != null) {
					String absHref = playerLink.attr("abs:href");
					playerContents.add(absHref);
				}
			}

			if (playerContents.size() == 22 && playerContents.get(0) != null
					&& !playerContents.get(0).equals("Team Totals")) {
				onePlayer.set球员名(playerContents.get(0));
				// 位置未获取
				// onePlayer.set位置(getPlayerPos(playerContents.get(1)));
				onePlayer.set位置("");
				onePlayer.set在场时间(convertTimeToSecond(playerContents.get(2)));
				onePlayer.set投篮命中数(Integer.parseInt(playerContents.get(3)));
				onePlayer.set投篮出手数(Integer.parseInt(playerContents.get(4)));
				onePlayer.set三分命中数(Integer.parseInt(playerContents.get(6)));
				onePlayer.set三分出手数(Integer.parseInt(playerContents.get(7)));
				onePlayer.set罚球命中数(Integer.parseInt(playerContents.get(9)));
				onePlayer.set罚球出手数(Integer.parseInt(playerContents.get(10)));
				onePlayer.set进攻篮板数(Integer.parseInt(playerContents.get(12)));
				onePlayer.set防守篮板数(Integer.parseInt(playerContents.get(13)));
				onePlayer.set总篮板数(Integer.parseInt(playerContents.get(14)));
				onePlayer.set助攻数(Integer.parseInt(playerContents.get(15)));
				onePlayer.set抢断数(Integer.parseInt(playerContents.get(16)));
				onePlayer.set盖帽数(Integer.parseInt(playerContents.get(17)));
				onePlayer.set失误数(Integer.parseInt(playerContents.get(18)));
				onePlayer.set犯规数(Integer.parseInt(playerContents.get(19)));
				onePlayer.set个人得分(Integer.parseInt(playerContents.get(20)));
				set.add(onePlayer);
			}
		}
		return set;
	}

	/*
	 * private String getPlayerPos(String absPlayerHref){ String pos="";
	 * Downloader down=new Downloader(); String
	 * htmlPlayer=down.download(absPlayerHref); Document
	 * doc=Jsoup.parse(htmlPlayer); Element
	 * tr=doc.select("tr.full_table").first(); Elements tds =
	 * tr.getElementsByTag("td"); for(Element td:tds){
	 * if(td.attr("align").equals("center")){ pos=td.text(); } } return pos; }
	 */

	private int convertTimeToSecond(String time) {
		int seconds = 0;
		if (time.contains(":")) {
			int pos = time.indexOf(':');
			String minute = time.substring(0, pos);
			String second = time.substring(pos + 1);
			try {
				seconds = Integer.parseInt(minute) * 60
						+ Integer.parseInt(second);
			} catch (NumberFormatException e) {
				e.printStackTrace();
			}
		} else {
			try {
				seconds = Integer.parseInt(time) * 60;
			} catch (NumberFormatException e) {
				e.printStackTrace();
			}
		}
		return seconds;
	}

	@Test
	public void test() {
		HtmlParser parser = new HtmlParser();
		Downloader down = new Downloader();
		String baseUrl = "http://www.basketball-reference.com";
		MatchVo vo=parser.encapsulateMatchVo(
				down.download("http://www.basketball-reference.com/boxscores/201212190MEM.html"),
				baseUrl);
		System.out.println(vo.getDate());
		System.out.println(vo.getTeam2());
		vo.getTeam2PlayersScore().forEach(s->{
			if(s.get球员名().equals("Darrell Arthur")){
				System.out.println("三分出手数:"+s.get三分出手数());
				System.out.println("三分命中:"+s.get三分命中数());
				System.out.println("得分:"+s.get个人得分());
				System.out.println("位置:"+s.get位置());
				System.out.println("助攻:"+s.get助攻数());
				System.out.println("时间:"+s.get在场时间());
				System.out.println("失误:"+s.get失误数());
				System.out.println("篮板"+s.get总篮板数());
				System.out.println("出手:"+s.get投篮出手数());
				System.out.println("命中:"+s.get投篮命中数());
				System.out.println("抢断:"+s.get抢断数());
				System.out.println("犯规:"+s.get犯规数());
				System.out.println("盖帽:"+s.get盖帽数());
				System.out.println("罚球出手:"+s.get罚球出手数());
				System.out.println("罚球命中:"+s.get罚球命中数());
				System.out.println("进攻篮板:"+s.get进攻篮板数());
				System.out.println("防守篮板:"+s.get防守篮板数());
			}
		});
		DataPersistenceInDatabase dp=new DataPersistenceInDatabase();
		dp.save(vo);
		// System.out.println(parser.getPlayerPos("http://www.basketball-reference.com/players/m/monrogr01.html"));
		
	}

}
